data <- readRDS("/Users/sooyeonchoi/Documents/MA_QMSS/Spring2024/GR5063_DV/HCMST_couples.rds")
head(data)
## CaseID CASEID_NEW qflag weight1 weight1_freqwt weight2 weight1a
## 1 11 2936569 Qualified 0.7752 60727 NA 0.7708
## 2 28 1946291 Qualified 1.6552 129664 NA 1.6457
## 3 37 2106585 Qualified 1.2118 94929 NA 1.2048
## 4 38 2963787 Qualified 1.0422 81643 NA 1.0362
## 5 42 2962195 Qualified NA NA 0.8783 NA
## 6 78 2278413 Qualified 0.9795 76732 NA 0.9739
## weight1a_freqwt weight_combo weight_combo_freqwt duration
## 1 60383 0.8715875 60497 8
## 2 128920 1.8608868 129164 16
## 3 94381 1.3623360 94560 120
## 4 81173 1.1716905 81327 240
## 5 NA 0.2721683 18891 7
## 6 76293 1.1012443 76438 6
## speed_flag consent xlgb
## 1 Completed survey in over 2 minutes Yes, I agree to participate gen pop
## 2 Completed survey in over 2 minutes Yes, I agree to participate gen pop
## 3 Completed survey in over 2 minutes Yes, I agree to participate gen pop
## 4 Completed survey in over 2 minutes Yes, I agree to participate gen pop
## 5 Completed survey in over 2 minutes Yes, I agree to participate LGB sample
## 6 Completed survey in over 2 minutes Yes, I agree to participate gen pop
## S1
## 1 Yes, I am Married
## 2 No, I am not Married
## 3 No, I am not Married
## 4 Yes, I am Married
## 5 No, I am not Married
## 6 No, I am not Married
## S2
## 1 <NA>
## 2 No, I am single, with no boyfriend, no girlfriend and no romantic or sexual partner
## 3 Yes, I have a sexual partner (boyfriend or girlfriend)
## 4 <NA>
## 5 No, I am single, with no boyfriend, no girlfriend and no romantic or sexual partner
## 6 Yes, I have a sexual partner (boyfriend or girlfriend)
## S3 DOV_Branch Q3_Refused Q4 Q5
## 1 <NA> 1 <NA> [Partner Name] is Male <NA>
## 2 Yes 2 <NA> <NA> <NA>
## 3 <NA> 1 <NA> [Partner Name] is Male <NA>
## 4 <NA> 1 <NA> [Partner Name] is Female <NA>
## 5 Yes 2 <NA> <NA> <NA>
## 6 <NA> 1 <NA> [Partner Name] is Female <NA>
## Q6A Q6B Q9 Q10
## 1 No (Not Latino or Hispanic) White 65 Professional or Doctorate degree
## 2 <NA> <NA> NA <NA>
## 3 No (Not Latino or Hispanic) White 22 11th grade
## 4 No (Not Latino or Hispanic) White 26 HS graduate or GED
## 5 <NA> <NA> NA <NA>
## 6 Yes, Puerto Rican White 29 Associate degree
## Q11 Q12 Q14
## 1 HS graduate or GED Leans Democrat HS graduate or GED
## 2 <NA> <NA> <NA>
## 3 11th grade Undecided/Independent/Other HS graduate or GED
## 4 HS graduate or GED Undecided/Independent/Other HS graduate or GED
## 5 <NA> <NA> <NA>
## 6 HS graduate or GED Leans Republican Associate degree
## Q15A7
## 1 I met [Partner Name] in [Answer in Q15A6]
## 2 <NA>
## 3 I met [Partner Name] in [Answer in Q15A6]
## 4 I met [Partner Name] somewhere else, Please specify the Country, State, and city or town:
## 5 <NA>
## 6 I met [Partner Name] in [Answer in Q15A6]
## Q16 Q16_Refused Q17A Q17B
## 1 2 <NA> Once (this is my first marriage) <NA>
## 2 NA <NA> <NA> <NA>
## 3 5 <NA> <NA> Once
## 4 5 <NA> Once (this is my first marriage) <NA>
## 5 NA <NA> <NA> <NA>
## 6 4 <NA> <NA> Never married
## Q17C Q17D
## 1 I am sexually attracted only to men <NA>
## 2 <NA> <NA>
## 3 I am sexually attracted only to men <NA>
## 4 <NA> I am sexually attracted only to women
## 5 <NA> <NA>
## 6 <NA> I am sexually attracted only to women
## Q19 Q20 Q21A_Year Q21A_Month Q21B_Year Q21B_Month Q21C_Year Q21C_Month
## 1 Yes <NA> 1967 May 1967 June 1970 August
## 2 <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## 3 No Yes 2010 April 2010 April 2012 September
## 4 Yes <NA> 2006 October 2014 Refused 2014 March
## 5 <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## 6 Yes <NA> 2004 January 2004 January 2014 January
## Q21D_Year Q21D_Month w6_identity w6_outness w6_outness_timing
## 1 1970 August heterosexual or straight <NA> <NA>
## 2 <NA> <NA> <NA> <NA> <NA>
## 3 <NA> <NA> heterosexual or straight <NA> <NA>
## 4 2014 July heterosexual or straight <NA> <NA>
## 5 <NA> <NA> <NA> <NA> <NA>
## 6 <NA> <NA> heterosexual or straight <NA> <NA>
## Q23 Q24_Refused Q25
## 1 [Partner Name] earned more <NA> Same High School
## 2 <NA> <NA> <NA>
## 3 I earned more <NA> Same High School
## 4 [Partner Name] was not working for pay <NA> Different High School
## 5 <NA> <NA> <NA>
## 6 I earned more <NA> Different High School
## Q26 Q27 Q28 w6_friend_connect_1
## 1 Did not attend same college or university Yes No Yes
## 2 <NA> <NA> <NA> <NA>
## 3 <NA> No No Yes
## 4 <NA> No No No
## 5 <NA> <NA> <NA> <NA>
## 6 Did not attend same college or university No No Yes
## w6_friend_connect_2 w6_friend_connect_3 w6_friend_connect_4
## 1 Yes Yes No
## 2 <NA> <NA> <NA>
## 3 Yes Yes No
## 4 No No Yes
## 5 <NA> <NA> <NA>
## 6 Yes Yes No
## w6_friend_connect_Refused
## 1 No
## 2 <NA>
## 3 No
## 4 No
## 5 <NA>
## 6 No
## Q32 Q34 Q35_Refused
## 1 No, I did NOT meet [Partner Name] through the Internet Excellent <NA>
## 2 <NA> <NA> <NA>
## 3 No, I did NOT meet [Partner Name] through the Internet Fair <NA>
## 4 No, I did NOT meet [Partner Name] through the Internet Excellent <NA>
## 5 <NA> <NA> <NA>
## 6 No, I did NOT meet [Partner Name] through the Internet Good <NA>
## w6_sex_frequency
## 1 2 to 3 times a month
## 2 <NA>
## 3 3 to 6 times a week
## 4 Once or twice a week
## 5 <NA>
## 6 Once or twice a week
## w6_otherdate
## 1 No, I have not met anyone for dating, romance, or sex besides [Partner Name] in the past year.
## 2 <NA>
## 3 No, I have not met anyone for dating, romance, or sex besides [Partner Name] in the past year.
## 4 No, I have not met anyone for dating, romance, or sex besides [Partner Name] in the past year.
## 5 <NA>
## 6 No, I have not met anyone for dating, romance, or sex besides [Partner Name] in the past year.
## w6_how_many w6_how_meet_Refused w6_otherdate_app w6_how_many_app
## 1 <NA> <NA> <NA> <NA>
## 2 <NA> <NA> <NA> <NA>
## 3 <NA> <NA> <NA> <NA>
## 4 <NA> <NA> <NA> <NA>
## 5 <NA> <NA> <NA> <NA>
## 6 <NA> <NA> <NA> <NA>
## Past_Partner_Q1 w6_relationship_end_nonmar w6_breakup_nonmar
## 1 <NA> <NA> <NA>
## 2 Yes <NA> <NA>
## 3 <NA> <NA> <NA>
## 4 <NA> <NA> <NA>
## 5 No We broke up We both equally wanted to break up
## 6 <NA> <NA> <NA>
## w6_relationship_end_mar w6_who_breakup
## 1 <NA> <NA>
## 2 divorce We both equally wanted the [divorce/separation] .
## 3 <NA> <NA>
## 4 <NA> <NA>
## 5 <NA> <NA>
## 6 <NA> <NA>
## Q5_2 Q6A_2 Q9B_2
## 1 <NA> <NA> NA
## 2 <NA> Yes, Other Latino/Hispanic 1992
## 3 <NA> <NA> NA
## 4 <NA> <NA> NA
## 5 Yes, we were a same-sex couple No (Not Latino or Hispanic) 1985
## 6 <NA> <NA> NA
## Q10_2 Q11_2 Q12_2
## 1 <NA> <NA> <NA>
## 2 HS graduate or GED No formal education Not Strong Democrat
## 3 <NA> <NA> <NA>
## 4 <NA> <NA> <NA>
## 5 Bachelor’s degree HS graduate or GED Undecided/Independent/Other
## 6 <NA> <NA> <NA>
## Q14_2 Q15A7_2_1 Q16_2
## 1 <NA> <NA> NA
## 2 HS graduate or GED I met [Partner Name] in [answer in Q15A7_2] 2
## 3 <NA> <NA> NA
## 4 <NA> <NA> NA
## 5 Bachelor’s degree I met [Partner Name] in [answer in Q15A7_2] 4
## 6 <NA> <NA> NA
## Q16_2_Codes Q17B_2 Q17C_2
## 1 <NA> <NA> <NA>
## 2 <NA> Once <NA>
## 3 <NA> <NA> <NA>
## 4 <NA> <NA> <NA>
## 5 <NA> Never married I am equally sexually attracted to men and women
## 6 <NA> <NA> <NA>
## Q17D_2 Q20_2 Q21A_2_Year Q21A_2_Month
## 1 <NA> <NA> <NA> <NA>
## 2 I am sexually attracted only to women Yes 2005 May
## 3 <NA> <NA> <NA> <NA>
## 4 <NA> <NA> <NA> <NA>
## 5 <NA> No 2004 January
## 6 <NA> <NA> <NA> <NA>
## Q21B_2_Year Q21B_2_Month Q21C_2_Year Q21C_2_Month Q21D_2_Year Q21D_2_Month
## 1 <NA> <NA> <NA> <NA> <NA> <NA>
## 2 2009 February 2011 April 2015 September
## 3 <NA> <NA> <NA> <NA> <NA> <NA>
## 4 <NA> <NA> <NA> <NA> <NA> <NA>
## 5 2007 March <NA> <NA> <NA> <NA>
## 6 <NA> <NA> <NA> <NA> <NA> <NA>
## Q21E_2_Year Q21E_2_Month Q21F_2_start_range Q21F_2_Year Q21F_2_Month
## 1 <NA> <NA> <NA> <NA> <NA>
## 2 2017 February Q21D_2 <NA> <NA>
## 3 <NA> <NA> <NA> <NA> <NA>
## 4 <NA> <NA> <NA> <NA> <NA>
## 5 2008 July Q21B_2 <NA> <NA>
## 6 <NA> <NA> <NA> <NA> <NA>
## w6_identity_2 w6_outness_2 w6_outness_timing_2
## 1 <NA> <NA> <NA>
## 2 heterosexual or straight <NA> <NA>
## 3 <NA> <NA> <NA>
## 4 <NA> <NA> <NA>
## 5 bisexual None of them <NA>
## 6 <NA> <NA> <NA>
## Q23_2 Q25_2
## 1 <NA> <NA>
## 2 I earned more Different High School
## 3 <NA> <NA>
## 4 <NA> <NA>
## 5 We earned about the same amount Same High School
## 6 <NA> <NA>
## Q26_2 Q27_2 Q28_2 w6_friend_connect_2_1
## 1 <NA> <NA> <NA> <NA>
## 2 <NA> Yes No Yes
## 3 <NA> <NA> <NA> <NA>
## 4 <NA> <NA> <NA> <NA>
## 5 Did not attend same college or university Yes No No
## 6 <NA> <NA> <NA> <NA>
## w6_friend_connect_2_2 w6_friend_connect_2_3 w6_friend_connect_2_4
## 1 <NA> <NA> <NA>
## 2 No No No
## 3 <NA> <NA> <NA>
## 4 <NA> <NA> <NA>
## 5 Yes Yes No
## 6 <NA> <NA> <NA>
## w6_friend_connect_2_Refused
## 1 <NA>
## 2 No
## 3 <NA>
## 4 <NA>
## 5 No
## 6 <NA>
## Q32_2
## 1 <NA>
## 2 No, I did NOT meet [Partner Name] through the Internet
## 3 <NA>
## 4 <NA>
## 5 No, I did NOT meet [Partner Name] through the Internet
## 6 <NA>
## w6_otherdate_2
## 1 <NA>
## 2 No, I have not met anyone for dating, romance, or sex in the past year
## 3 <NA>
## 4 <NA>
## 5 No, I have not met anyone for dating, romance, or sex in the past year
## 6 <NA>
## w6_how_many_2 w6_otherdate_app_2 w6_how_many_app_2 partyid7
## 1 <NA> <NA> <NA> Strong Democrat
## 2 <NA> <NA> <NA> Strong Democrat
## 3 <NA> <NA> <NA> Leans Republican
## 4 <NA> <NA> <NA> Not Strong Republican
## 5 <NA> <NA> <NA> Not Strong Democrat
## 6 <NA> <NA> <NA> Leans Republican
## PERSNET_hom ppc10017 ppc21310 ppp20071
## 1 Yes Yes Every day Not asked
## 2 Yes Yes Every day No
## 3 Yes Yes Every day No
## 4 Yes Yes Every day No
## 5 Yes Yes Every day Not asked
## 6 Yes Yes At least once a week but not every day No
## ppp20072 ppage ppagecat ppagect4 ppeduc
## 1 Never 65 65-74 60+ Associate degree
## 2 Never 27 25-34 18-29 Some college, no degree
## 3 A few times a year 23 18-24 18-29 High school graduate
## 4 Never 26 25-34 18-29 High school graduate
## 5 A few times a year 26 25-34 18-29 Some college, no degree
## 6 A few times a year 31 25-34 30-44 Associate degree
## ppeducat ppethm ppgender pphhhead pphhsize
## 1 Some college White, Non-Hispanic Female Yes 2
## 2 Some college Hispanic Male Yes 1
## 3 High school White, Non-Hispanic Female Yes 2
## 4 High school White, Non-Hispanic Male Yes 6
## 5 Some college White, Non-Hispanic Female No 5
## 6 Some college White, Non-Hispanic Male Yes 3
## pphouse ppincimp
## 1 A one-family house attached to one or more houses $60,000 to $74,999
## 2 A building with 2 or more apartments $40,000 to $49,999
## 3 A one-family house detached from any other house $25,000 to $29,999
## 4 A mobile home $15,000 to $19,999
## 5 A one-family house detached from any other house $60,000 to $74,999
## 6 A one-family house detached from any other house $100,000 to $124,999
## ppmarit ppmsacat PPREG4 ppreg9
## 1 Married Non-Metro Midwest West-North Central
## 2 Never married Metro Northeast Mid-Atlantic
## 3 Divorced Non-Metro South South Atlantic
## 4 Married Metro West Mountain
## 5 Never married Metro West Pacific
## 6 Living with partner Metro Northeast Mid-Atlantic
## pprent PPT01 PPT25 PPT612
## 1 Owned or being bought by you or someone in your household 0 0 0
## 2 Rented for cash 0 0 0
## 3 Owned or being bought by you or someone in your household 0 0 0
## 4 Rented for cash 1 1 2
## 5 Owned or being bought by you or someone in your household 0 0 0
## 6 Owned or being bought by you or someone in your household 0 0 0
## PPT1317 PPT18OV ppwork Race_1 Race_2 Race_3 Race_4
## 1 0 2 Not working - retired Yes No No No
## 2 0 1 Working - as a paid employee Yes No No No
## 3 0 2 Working - as a paid employee Yes No No No
## 4 0 2 Working - as a paid employee Yes No No No
## 5 0 5 Working - as a paid employee Yes No No No
## 6 0 3 Working - as a paid employee Yes No No No
## Race_5 Race_6 race1 race2 race3 race4 race5 race6 race7 race8 race9 race10
## 1 No No 1 0 0 0 0 0 0 0 0 0
## 2 No No 1 0 0 0 0 0 0 0 0 0
## 3 No No 1 0 0 0 0 0 0 0 0 0
## 4 No No 1 0 0 0 0 0 0 0 0 0
## 5 No Yes 1 0 0 0 0 0 0 0 0 0
## 6 No No 1 0 0 0 0 0 0 0 0 0
## race11 race12 race13 race14 race15 w6_took_the_survey w6_prior_identity_lgb
## 1 0 0 0 0 0 took the survey straight/ non LGB
## 2 0 0 0 0 0 took the survey straight/ non LGB
## 3 0 0 0 0 0 took the survey straight/ non LGB
## 4 0 0 0 0 0 took the survey straight/ non LGB
## 5 0 0 0 0 0 took the survey LGB
## 6 0 0 0 0 0 took the survey straight/ non LGB
## w6_same_sex_couple w6_same_sex_couple_gender w6_q4
## 1 Heterosexual Couples hetero couple [Partner Name] is Male
## 2 Heterosexual Couples hetero couple [Partner Name] is Female
## 3 Heterosexual Couples hetero couple [Partner Name] is Male
## 4 Heterosexual Couples hetero couple [Partner Name] is Female
## 5 Same Sex Couples lesbian couple [Partner Name] is Female
## 6 Heterosexual Couples hetero couple [Partner Name] is Female
## w6_q5 w6_q6a
## 1 <NA> No (Not Latino or Hispanic)
## 2 <NA> Yes, Other Latino/Hispanic
## 3 <NA> No (Not Latino or Hispanic)
## 4 <NA> No (Not Latino or Hispanic)
## 5 Yes, we are a same-sex couple No (Not Latino or Hispanic)
## 6 <NA> Yes, Puerto Rican
## w6_q6b w6_q9 w6_q10
## 1 White 65 Professional or Doctorate degree
## 2 White 25 High school graduate
## 3 White 22 Never graduated high school
## 4 White 26 High school graduate
## 5 Other (please specify) 32 Bachelor’s degree
## 6 White 29 Associate degree
## w6_q11 w6_q12 w6_q14
## 1 HS graduate or GED Leans Democrat HS graduate or GED
## 2 No formal education Not Strong Democrat HS graduate or GED
## 3 11th grade Undecided/Independent/Other HS graduate or GED
## 4 HS graduate or GED Undecided/Independent/Other HS graduate or GED
## 5 HS graduate or GED Undecided/Independent/Other Bachelor’s degree
## 6 HS graduate or GED Leans Republican Associate degree
## w6_q15a1_truncated w6_q15a4_truncated
## 1 United States United States
## 2 United States United States
## 3 United States United States
## 4 United States United States
## 5 Another country, please specify Another country, please specify
## 6 United States United States
## w6_q15a7
## 1 I met [Partner Name] in [Answer in Q15A6]
## 2 I met [Partner Name] in [Answer in Q15A6]
## 3 I met [Partner Name] in [Answer in Q15A6]
## 4 I met [Partner Name] somewhere else, Please specify the Country, State, and city or town:
## 5 I met [Partner Name] in [Answer in Q15A6]
## 6 I met [Partner Name] in [Answer in Q15A6]
## w6_q16 w6_q17 w6_attraction w6_q19 w6_q20
## 1 2 1 sexually attracted only to opposite gender Yes <NA>
## 2 2 1 sexually attracted only to opposite gender <NA> Yes
## 3 5 1 sexually attracted only to opposite gender No Yes
## 4 5 1 sexually attracted only to opposite gender Yes <NA>
## 5 4 0 sexually attracted to men and women equally <NA> No
## 6 4 0 sexually attracted only to opposite gender Yes <NA>
## w6_q21a_year w6_q21a_month w6_q21a_month_flag w6_q21b_year w6_q21b_month
## 1 1967 May no 1967 June
## 2 2005 May no 2009 February
## 3 2010 April no 2010 April
## 4 2006 October no 2014 August
## 5 2004 January no 2007 March
## 6 2004 January no 2004 January
## w6_q21b_month_flag w6_q21c_year w6_q21c_month w6_q21c_month_flag w6_q21d_year
## 1 0 1970 August NA 1970
## 2 0 2011 April NA 2015
## 3 0 2012 September NA NA
## 4 1 2014 March NA 2014
## 5 0 NA <NA> NA NA
## 6 0 2014 January NA NA
## w6_q21d_month w6_q21e_year w6_q21e_month w6_q21f_year w6_q21f_month
## 1 August NA <NA> NA <NA>
## 2 September 2017 February NA <NA>
## 3 <NA> NA <NA> NA <NA>
## 4 July NA <NA> NA <NA>
## 5 <NA> 2008 July NA <NA>
## 6 <NA> NA <NA> NA <NA>
## w6_identity_all w6_outness_all w6_outness_timing_all
## 1 heterosexual or straight <NA> NA
## 2 heterosexual or straight <NA> NA
## 3 heterosexual or straight <NA> NA
## 4 heterosexual or straight <NA> NA
## 5 bisexual None of them NA
## 6 heterosexual or straight <NA> NA
## w6_q23 w6_q24_length w6_q25
## 1 [Partner Name] earned more 143 Same High School
## 2 I earned more 172 Different High School
## 3 I earned more 205 Same High School
## 4 [Partner Name] was not working for pay 312 Different High School
## 5 We earned about the same amount 92 Same High School
## 6 I earned more 86 Different High School
## w6_q26 w6_q27 w6_q28 w6_friend_connect_1_all w6_friend_connect_2_all
## 1 2 Yes No yes yes
## 2 NA Yes No yes no
## 3 NA No No yes yes
## 4 NA No No no no
## 5 2 Yes No no yes
## 6 2 No No yes yes
## w6_friend_connect_3_all w6_friend_connect_4_all
## 1 yes no
## 2 no no
## 3 yes no
## 4 no yes
## 5 yes no
## 6 yes no
## w6_q32 w6_q34
## 1 No, I did NOT meet [Partner Name] through the Internet Excellent
## 2 No, I did NOT meet [Partner Name] through the Internet <NA>
## 3 No, I did NOT meet [Partner Name] through the Internet Fair
## 4 No, I did NOT meet [Partner Name] through the Internet Excellent
## 5 No, I did NOT meet [Partner Name] through the Internet <NA>
## 6 No, I did NOT meet [Partner Name] through the Internet Good
## w6_otherdate_all
## 1 No, I have not met anyone for dating, romance, or sex besides [Partner Name] in the past year.
## 2 No, I have not met anyone for dating, romance, or sex besides [Partner Name] in the past year.
## 3 No, I have not met anyone for dating, romance, or sex besides [Partner Name] in the past year.
## 4 No, I have not met anyone for dating, romance, or sex besides [Partner Name] in the past year.
## 5 No, I have not met anyone for dating, romance, or sex besides [Partner Name] in the past year.
## 6 No, I have not met anyone for dating, romance, or sex besides [Partner Name] in the past year.
## w6_how_many_all w6_otherdate_app_all w6_how_many_app_all w6_number_people_met
## 1 NA NA NA 0
## 2 NA NA NA 0
## 3 NA NA NA 0
## 4 NA NA NA 0
## 5 NA NA NA 0
## 6 NA NA NA 0
## w6_otherdate_dichotomous w6_married relate_duration_at_w6_years
## 1 no yes 50.083332
## 2 no no NA
## 3 no no 7.250000
## 4 no yes 2.916667
## 5 no no NA
## 6 no no 13.500000
## w6_number_people_met_app weight_combo_v2 partnership_status
## 1 NA 0.8534415 married
## 2 NA 1.8221443 unpartnered, has had past partner
## 3 NA 1.3339731 partnered, not married
## 4 NA 1.1472965 married
## 5 NA 0.3829862 unpartnered, has had past partner
## 6 NA 1.0783170 partnered, not married
## female year_fraction_met year_fraction_relstart age_when_met
## 1 1 1967.375 1967.458 15
## 2 0 2005.375 2009.125 15
## 3 1 2010.292 2010.292 16
## 4 0 2006.792 2014.625 15
## 5 1 2004.042 2007.208 13
## 6 0 2004.042 2004.042 18
## time_from_met_to_rel year_fraction_first_cohab time_from_rel_to_cohab
## 1 0.08337402 1970.625 3.166626
## 2 3.75000000 2011.292 2.166626
## 3 0.00000000 2012.708 2.416748
## 4 7.83337402 2014.208 0.000000
## 5 3.16674805 NA NA
## 6 0.00000000 2014.042 10.000000
## R_cowork R_friend R_family R_sig_other R_neighbor P_cowork P_friend P_family
## 1 no no no no no no no no
## 2 no yes no no no no yes no
## 3 no no no no no no no no
## 4 no no no no no no yes no
## 5 no no no no no no no no
## 6 no no no no no no no no
## P_sig_other P_neighbor btwn_I_cowork btwn_I_friend btwn_I_family
## 1 no no no no no
## 2 no no no no no
## 3 no no no no no
## 4 no no no no yes
## 5 no no no no no
## 6 no no no no no
## btwn_I_sig_other btwn_I_neighbor hcm2017_q24_length summary_all_codes
## 1 no no 143 2
## 2 no no 172 4
## 3 no no 205 2
## 4 no no 312 4
## 5 no no 92 2
## 6 no no 86 2
## w6_relationship_quality met_through_family met_through_friend
## 1 excellent no no
## 2 <NA> no yes
## 3 fair no no
## 4 excellent no yes
## 5 <NA> no no
## 6 good no no
## met_through_as_nghbrs met_as_through_cowork w6_subject_race interracial_5cat
## 1 no no 1 no
## 2 no no 1 no
## 3 no no 1 no
## 4 no no 1 no
## 5 no no 1 yes
## 6 no no 1 no
## partner_mother_yrsed subject_mother_yrsed partner_yrsed subject_yrsed
## 1 12 12 20 14
## 2 0 12 12 13
## 3 11 12 11 12
## 4 12 12 12 12
## 5 12 16 16 13
## 6 12 14 14 14
## meeting_type value
## 1 Primary or Secondary School yes
## 2 Primary or Secondary School yes
## 3 Primary or Secondary School yes
## 4 Primary or Secondary School yes
## 5 Primary or Secondary School yes
## 6 Primary or Secondary School yes
library(ggplot2)
library(ggthemes)
## Warning: package 'ggthemes' was built under R version 4.2.3
library(gapminder)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
# creating function to simplify types of first-time meeting
simplify_meeting_type <- function(meeting_type) {
if (meeting_type %in% c("Primary or Secondary school", "College")) {
return("School")
} else if (meeting_type %in% c("Met Online", "Internet Social Network", "Internet", "Internet Site", "Online Gaming", "Internet Dating or Phone App", "Internet Chat")) {
return("Online")
} else if (meeting_type %in% c("Volunteer Organization", "One-time Service Interaction", "Private Party", "Church", "Bar or Restaurant")) {
return("Social Gathering")
} else if (meeting_type %in% c("Work Neighbors", "Business Trip")) {
return("Work-related")
} else if (meeting_type %in% c("Online")) {
return("Online")
} else {
return("Other")
}
}
# create new dataframe for simplified ver. of meeting_type
data$meeting_type_simplified <- sapply(data$meeting_type, simplify_meeting_type)
# remove NA from Q21A_Year
data <- data[!is.na(data$Q21A_Year),]
# convert Q21A_Year to numeric variable
data$Q21A_Year <- as.numeric(as.character(data$Q21A_Year))
## Warning: NAs introduced by coercion
# calculate frequency of meeting types over time
meeting_type_freq <- data %>%
group_by(Q21A_Year, meeting_type_simplified) %>%
summarise(count = n()) %>%
ungroup()
## `summarise()` has grouped output by 'Q21A_Year'. You can override using the
## `.groups` argument.
# line plot
ggplot(meeting_type_freq, aes(x = Q21A_Year, y = count, color = meeting_type_simplified)) +
geom_line() +
labs(title = "Frequency of Meeting Types Over Time", x = "year", y = "frequency", color = "meeting type") +
theme_minimal() +
scale_color_brewer(palette = "Set1")
## Warning: Removed 5 rows containing missing values (`geom_line()`).
# area plot
ggplot(meeting_type_freq, aes(x = Q21A_Year, y = count, fill = meeting_type_simplified)) +
geom_area(position = "stack") +
labs(title = "Frequency of Meeting Types Over Years", x = "year", y = "frequency", fill = "meeting type") +
theme_minimal()
## Warning: Removed 5 rows containing non-finite values (`stat_align()`).
I initially created a line plot to capture trends over time and then transformed it into an area plot to display cumulative frequency stacked over time. I recommend using the area plot as it better emphasizes the overall distribution and composition of how the modes of meeting have changed over time, while the line plot is effective for showing individual trends.
In both plots, I used different colors to distinguish between different meeting types. I tried to ensure that the colors are easily distinguishable. For the line plot, I employed predefined color palettes to ensure better color distinction and aesthetic appeal. To effectively communicate the content of the plots, I provided titles and labeled the x-axis as “year” and the y-axis as “frequency”.
#2. Age is just a number
ggplot(data, aes(x = ppage, y = Q9, color=ppgender)) +
geom_point(size=2, shape = 21) +
geom_smooth(method=lm) +
labs(title = "Correlation of Ages by Gender", x="Respondent's Age", y="Partner's Age", color="Gender") +
annotate ("text", x = 35, y = 80, label = "Positive correlation \nbetween ages \n for both genders", color = "black", size = 3, fontface = "bold")
## `geom_smooth()` using formula = 'y ~ x'
#3. Politics and Dating
meeting_type_df <- data.frame(table(data$partyid7, data$meeting_type_simplified))
colnames(meeting_type_df) <- c("Party", "Meeting_Type", "Count")
# remove the rows with zero counts
meeting_type_df <- meeting_type_df[meeting_type_df$Count != 0,]
ggplot(meeting_type_df, aes(x = Party, y = Count, fill = Meeting_Type)) +
geom_bar(stat = "identity", position = "stack") +
labs(title = "Meeting Type Distribution by Political Affiliation", x = "Respondent's Political Affiliation", y = "Count", fill = "meeting type") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
relationship_dur_df <- data.frame(table(data$partyid7, data$w6_q12, data$duration))
colnames(relationship_dur_df) <- c("Party", "Partner_Party", "Duration", "Count")
# remove the rows with zero counts
relationship_dur_df <- relationship_dur_df[relationship_dur_df$Count != 0,]
ggplot(relationship_dur_df, aes(x = Party, y = Count, fill = Partner_Party)) +
geom_bar(stat = "identity", position = "dodge") +
labs(title = "Relationship Duration by Political Affiliation",
x = "Respondent's Political Affiliation",
y = "Count",
fill = "Partner's Political Affiliation") +
theme_minimal() +
scale_fill_brewer(palette = "Dark1") +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
## Warning in pal_name(palette, type): Unknown palette Dark1
Initially, I created a bar chart to demonstrate how the mode of meeting varies depending on the respondent’s political affiliation. However, this visualization does not seem to effectively illustrate the relative proportions of the different categories. Then, I generated a new data frame with the relationship duration and their political status. I recommend using the second plot as it has better visualization, capturing the relationship between the duration of the couples’ relationships and their political affiliation.
#4. Your turn to choose
summary(data$Q10)
## Refused No formal education
## 3 4
## 1st-4th grade 5th or 6th grade
## 2 7
## 7th or 8th grade 9th grade
## 7 8
## 10th grade 11th grade
## 10 15
## 12th grade no diploma HS graduate or GED
## 62 550
## Some college, no degree Associate degree
## 620 286
## Bachelor’s degree Master’s degree
## 762 386
## Professional or Doctorate degree
## 160
# Q10: highest level of schooling that the partner has completed
# create function to simplify the partner's academic background
simplify_Q10 <- function(Q10) {
if (Q10 %in% c("Refused", "No formal education")) {
return("no formal education")
} else if (Q10 %in% c("1st-4th grade", "5th or 6th grade", "7th or 8th grade", "10th grade", "11th grade", "12th grade no diploma")) {
return("some basic education")
} else if (Q10 %in% c("HS graduate or GED")) {
return("highschool")
} else if (Q10 %in% c("some college, no degree", "Associate degree")) {
return("some college")
} else if (Q10 %in% c("Bachelor’s degree")) {
return("BA")
} else if (Q10 %in% c("Master’s degree")) {
return("MA")
} else if (Q10 %in% c("Professional or Doctorate degree")) {
return("PHD")
} else {
return("Other")
}
}
# create a new dataframe
data$edu_simplified <- sapply(data$Q10, simplify_Q10)
# bar chart
ggplot(data = data, aes(x = ppgender)) +
geom_bar(aes(fill = factor(edu_simplified)), position = "dodge") +
labs(title = "Gender and Partner's Academic Background", x = "Gender", y = "Count", fill = "partner's last education") +
theme_minimal()
# box chart
ggplot(data = data, aes(x = edu_simplified, y=ppage)) +
geom_boxplot(aes(fill=ppgender)) +
coord_flip() +
labs(title = "Partner's Academic Background by Age and Gender", x = "Partner's Last Education", y = "Age", fill = "Gender") +
theme_minimal()
I became intrigued by the impact of a partner’s academic background and generated a bar chart to explore potential gender disparities in completed education levels among their partners. Then, I created a box chart that incorporates age data alongside gender. I recommend using the box chart since it not only offers a more comprehensive representation but better highlights gender differences in partner’s academic backgrounds.
#5. Make two plots interactive
library(plotly)
## Warning: package 'plotly' was built under R version 4.2.3
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
ages_scatter <- ggplot(data, aes(x = ppage, y = Q9, color=ppgender)) +
geom_point(size=2, shape = 21, alpha = 0.6) +
geom_smooth(method=lm, color = "black") +
labs(title = "Correlation of Ages by Gender", x="Respondent's Age", y="Partner's Age", color="Gender") +
theme(legend.position = "bottom")
ggplotly(ages_scatter, tooltip = c("ppage", "Q9", "ppgender"))
## `geom_smooth()` using formula = 'y ~ x'
edu_box <- ggplot(data = data, aes(x = edu_simplified, y=ppage)) +
geom_boxplot(aes(fill=ppgender)) +
coord_flip() +
labs(title = "Partner's Educational Status by Age and Gender", x = "partner's highest completed education", y = "Age", fill = "gender") +
theme_minimal() +
theme(legend.title = element_blank())
ggplotly(edu_box)
Interactivity enables readers to explore the data points in detail, providing exact values or labels. It allows readers to compare variables more effectively. For example, in the scatter plot, readers can hover over different points to compare respondent ages with their partners’ across gender categories. Also, interactive plots elevate the presentation of findings. For instance, the simplified interactive box plot captivates readers with its presentation, rendering results more engaging and memorable.
#6. Data Table
new_data <- data[, c("ppage", "ppgender", "Q9", "Q4", "Q32", "Q34","w6_sex_frequency", "w6_otherdate")]
names(new_data) <- c ("age", "gender", "partner's age", "partner's gender", "met online", "relationship quality", "sex frequency", "experience of cheating")
library(DT)
## Warning: package 'DT' was built under R version 4.2.3
new_data %>%
datatable(
rownames = FALSE,
filter = list(position = "top"),
options = list(language = list(sSearch = "Filter:"))
)
I incorporated demographic information on both respondents and their partners, specifically gender and age. Also, I included data, asking whether the respondents initially met their partners online. Then, I delved into aspects exploring the quality of their relationships, such as direct descriptions from respondents about their relationship quality, frequency of sexual activity, and experiences of infidelity within the past year. These information may serve as valuable insights for examining potential correlations between the platform of initial engagement and subsequent quality of the relationships.